home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Aminet 22
/
Aminet 22 (1997)(GTI - Schatztruhe)[!][Dec 1997].iso
/
Aminet
/
mus
/
misc
/
MusicIn.lha
/
musicin
/
FastFT_float_asm.a
< prev
next >
Wrap
Text File
|
1997-09-23
|
15KB
|
509 lines
;--------------------------------------------------------------------------------------------
;-------- Fast Fourier Transformation for real-valued inputs ---------------
;-- ©1997 Henryk "Buggs" Richter, tfa652@cks1.rz.uni-rostock.de --
;-- --
;-- able to transform 1 real valued array of input data at highspeed --
;-- --
;-- Inputs & Outputs are single precision floating point variables --
;-- --
;-- Requirements: 68020 + FPU --
;-- --
;-- date: 17.09.97 --
;--------------------------------------------------------------------------------------------
;
Gamma EQU 9 ;N = 2^Gamma (max input points this routine has got data arrays for)
section blah,code
XDEF @ASM_FastFT_main_loop
XDEF _ASM_FastFT_main_loop ; for SAS-C 6.5
XDEF @ASM_FastFT_energy_phi
XDEF _ASM_FastFT_energy_phi ; for SAS-C 6.5
;-------------------------------------------------------------------------------------------
; create Power spectrum + phase angle (based on Stéphane TAVENARD`s routine)
;
;Inputs: a0: float *x_real
; a1: float *x_imag
; a2: float *energy
; a3: float *phi
; d0: n
;
_ASM_FastFT_energy_phi
@ASM_FastFT_energy_phi
movem.l a2-a6,-(sp)
lsr.w #1,d0 ; process only n/2 + 1 data items (=HBLKSIZE)
FFT_energy_phi_0
fmove.s (a0)+,fp0
fmove.s (a1)+,fp1
fmove.x fp0,fp4
fmove.x fp1,fp5
fmul.x fp4,fp4 ; (#2)
fmul.x fp5,fp5 ; (#2)
fadd.x fp4,fp5 ; fp5 = en = real^2 + imag^2
fmove.s fp5,(a2)+ ; *energy++ = en
ftst.x fp5
fbne.w FFT_energy_phi_1 ; en <> 0
fmove.s #0,fp1 ; phi = 0
bra.s FFT_energy_phi_5
FFT_energy_phi_1 ftst.x fp0
fbeq.w FFT_energy_phi_3 ; real == 0
FFT_energy_phi_2 fdiv.x fp0,fp1 ; fp1 = imag / real (#2)
fatan.x fp1,fp1 ; fp1 = atan( imag / real )
bra.s FFT_energy_phi_5
FFT_energy_phi_3
fmove.s #1.570796327,fp2 ; ¶/2 (90° in radiant)
ftst.x fp1
fbgt.w FFT_energy_phi_90
fneg.x fp2 ; -¶/2
FFT_energy_phi_90
fmove.x fp2,fp1
FFT_energy_phi_5 fmove.s fp1,(a3)+ ; *phi++ = phi
dbra d0,FFT_energy_phi_0
movem.l (sp)+,a2-a6
rts
_ASM_FastFT_main_loop
@ASM_FastFT_main_loop
;-------------------------------------------------------------------------------------------
;Inputs: d0: power
; a0: float *x_real ;x_real[k] with k=0,1,...,(1<<power)-1
; a1: float *x_imag ;x_imag[k]=0
; (a1 as Input pointer irrelevant since only real-valued samples need to be transformed)
; a2: float *SinCosTab
; a3: float *SinCosTab2
; (a5: WORD *BitreverseTab) ;at the moment this table kept local
;
;Outputs: x_real[k], x_imag[k] with k=0,1,...,(1<<power/2)-1
; is the fourier transformed array[1<<power] only representing the
; positive frequency range (the negative part is mirrored anyway
; and wouldn`t contain extra information == redundant)
;
movem.l d0-d7/a0-a6,-(sp)
subq.w #1,d0 ;this FFT needs only half of
;the operations due to
;real valued input samples
;
lea BitreverseTab,a5 ;Bit reversing (Butterfly)
cmp.w #Gamma,d0 ;more data than we`ve got data arrays for ?
bhi FFT_Fail
cmp.l Last_Gamma,d0 ;need to create tables ?
beq SkipTables
move.l d0,Last_Gamma
bsr MakeBitreversetabs
SkipTables
movem.l d0/a0/a1/a3,-(sp) ;need to be restored upon writing
;the N*2 transformed output out
;of the N points transformation
bsr MakeIntab ;reorganize input tables for FFT:
;assumed, you wanna transform 1024
;successive points of the form:
;
;x(k) ; k=0,1,...,2N-1
;
;then you have to split the data
;in the following way:
;
;Realtab(n)=x(2n) \ n=0,1,...,N-1
;Imgtab(n) =x(2n+1) /
;
lea Realtab,a0
lea Imgtab,a1
;------------------------------- InitWerte ------------------------------------------------------
moveq #0,d1
bset d0,d1 ;1<<Gamma = N
move.l d1,d5
lsl.w #2,d5
subq.l #1,d5 ;N*4-1
lsl #1,d1 ;N2=N/2 (*4 because of longword-wide mem access)
move.w d0,d2 ;Gamma
subq.w #1,d2 ;NU1=Gamma-1
moveq #0,d3 ;K=0
move.l a2,a3 ;Sine- and Cosine table for N points, bitreversed
cmp #2,d2 ;< 8 points to transform ?
blt FFT_Standard
;-----------------------------------------------------------------------------------------------
;----------------------- 1st FFT run, optimized for sin=0 & cos=1 ------------------------------
;-----------------------------------------------------------------------------------------------
move.w d1,a2 ;save d1, make DBF-Loop possible
lsr.w #2,d1 ;d1 is originally 4 * N/2
subq.w #1,d1 ;Loop from I to N2
move.w a2,d7 ;N2
add.w d3,d7 ;K+N2
FFTopt_Innerloop
;----- real part ---------
fmove.s (a0,d7.w),fp0 ;XReal(k+N2)
fmove.s (a1,d7.w),fp4 ;XImag(k+N2)
fmove.x fp4,fp6
fmove.s (a0,d3.w),fp6 ;XReal(K)
fsub.x fp0,fp6 ;minus Treal
fmove.s fp6,(a0,d7.w) ;XReal(K+N2)=XReal(k)-TReal
fadd.x fp0,fp6 ;-> =XReal(k)
fadd.x fp0,fp6 ;Xreal(k)+TReal
fmove.s fp6,(a0,d3.w) ;write conjugated complex mirror result
;----- imaginary part ------
fmove.s (a1,d3.w),fp0 ;XImag(K)
fsub.x fp4,fp0 ;minus TImag
fmove.s fp0,(a1,d7.w) ;XImag(K+N2)=XImag(k)-TImag
fadd.x fp4,fp0 ;-> =XImag(k)
fadd.x fp4,fp0 ;XImag(k)+TImag
fmove.s fp0,(a1,d3.w) ;write conjugated complex mirror result
addq.w #4,d3 ;k=k+1
addq.w #4,d7 ;k+N2=k+N2+1
dbf d1,FFTopt_Innerloop
move.w a2,d1
;------------------ one complete iteration done, next one -----------------------------------
moveq #0,d3 ;K=0
lsr.w #1,d1 ;N2=N2/2
subq #1,d2
;-----------------------------------------------------------------------------------------------
;---------- 2nd FFT run, optimized for sin=0 & cos=1 (and sin=1 & cos=0) --------------------
;-----------------------------------------------------------------------------------------------
move.w d1,a2 ;save d1, make DBF-Loop possible
lsr.w #2,d1 ;d1 is originally 4 * N/2
subq.w #1,d1 ;Loop from I to N2
move.w a2,d7 ;N2
add.w d3,d7 ;K+N2
FFTopt2_Innerloop
;----- real part ---------
fmove.s (a0,d7.w),fp0 ;XReal(k+N2)
fmove.s (a1,d7.w),fp4 ;XImag(k+N2)
fmove.x fp4,fp6
fmove.s (a0,d3.w),fp6 ;XReal(K)
fsub.x fp0,fp6 ;minus Treal
fmove.s fp6,(a0,d7.w) ;XReal(K+N2)=XReal(k)-TReal
fadd.x fp0,fp6 ;-> =XReal(k)
fadd.x fp0,fp6 ;Xreal(k)+TReal
fmove.s fp6,(a0,d3.w) ;write conjugated complex mirror result
;----- imaginary part ------
fmove.s (a1,d3.w),fp0 ;XImag(K)
fsub.x fp4,fp0 ;minus TImag
fmove.s fp0,(a1,d7.w) ;XImag(K+N2)=XImag(k)-TImag
fadd.x fp4,fp0 ;-> =XImag(k)
fadd.x fp4,fp0 ;XImag(k)+TImag
fmove.s fp0,(a1,d3.w) ;write conjugated complex mirror result
addq.w #4,d3 ;k=k+1
addq.w #4,d7 ;k+N2=k+N2+1
dbf d1,FFTopt2_Innerloop
move.w a2,d1
add.w d1,d3
;------------------ 1st half of this iteration done ----------------------------------------
;optimized for sin=1 , cos = 0
move.w d1,a2 ;save d1, make DBF-Loop possible
lsr.w #2,d1 ;d1 is originally 4 * N/2
subq.w #1,d1 ;Loop from I to N2
move.w a2,d7 ;N2
add.w d3,d7 ;K+N2
FFTopt3_Innerloop
;----- real part ---------
fmove.s (a0,d7.w),fp1 ;XReal(k+N2)
fmove.s (a1,d7.w),fp0 ;XImag(k+N2)
fmove.s (a0,d3.w),fp6 ;XReal(K)
fsub.x fp0,fp6 ;minus Treal
fmove.s fp6,(a0,d7.w) ;XReal(K+N2)=XReal(k)-TReal
fadd.x fp0,fp6 ;-> =XReal(k)
fadd.x fp0,fp6 ;Xreal(k)+TReal
fmove.s fp6,(a0,d3.w) ;write conjugated complex mirror result
;----- imaginary part ------
fmove.s (a1,d3.w),fp0 ;XImag(K)
fadd.x fp1,fp0 ;minus TImag
fmove.s fp0,(a1,d7.w) ;XImag(K+N2)=XImag(k)-TImag
fsub.x fp1,fp0 ;-> =XImag(k)
fsub.x fp1,fp0 ;XImag(k)+TImag
fmove.s fp0,(a1,d3.w) ;write conjugated complex mirror result
addq.w #4,d3 ;k=k+1
addq.w #4,d7 ;k+N2=k+N2+1
dbf d1,FFTopt3_Innerloop
move.w a2,d1
;------------------ one complete iteration done, next one -----------------------------------
moveq #0,d3 ;K=0
lsr.w #1,d1 ;N2=N2/2
subq #1,d2
FFT_Standard
;-----------------------------------------------------------------------------------------------
;--------------------------------- FFT routine main loop ---------------------------------------
;-----------------------------------------------------------------------------------------------
FFT_Outerloop
FFT_Innerloop2
move.w d1,a2 ;save d1, make DBF-Loop possible
lsr.w #2,d1 ;d1 is originally 4 * N/2
subq.w #1,d1 ;Loop from I to N2
move.w a2,d7 ;N2
add.w d3,d7 ;K+N2
move.w d3,d4 ;M = K % 2^NU1
lsr.w d2,d4 ;
and.w #~3,d4 ;
;---------------- Bit reversing P=IBR(M) already done --------------------
fmove.s 4(a3,d4.w*2),fp5 ;cos(M)
fmove.s (a3,d4.w*2),fp7 ;sin(M)
;W^P = e^(2*PI*P/N)
;or real=cos(2*PI*P/N)
; img =sin(2*PI*P/N)
FFT_Innerloop
;----- real part ---------
fmove.s (a0,d7.w),fp0 ;XReal(k+N2)
fmove.x fp0,fp1
fmove.s (a1,d7.w),fp4 ;XImag(k+N2)
fmove.x fp4,fp6
fmul.x fp5,fp0 ;Xreal*cos
fmul.x fp5,fp4 ;XImag*cos
fmul.x fp7,fp6 ;XImag*sin
fadd.x fp6,fp0 ;Treal= XReal*cos+Ximag*sin
fmove.s (a0,d3.w),fp6 ;XReal(K)
fsub.x fp0,fp6 ;minus Treal
fmove.s fp6,(a0,d7.w) ;XReal(K+N2)=XReal(k)-TReal
fadd.x fp0,fp6 ;-> =XReal(k)
fadd.x fp0,fp6 ;Xreal(k)+TReal
fmove.s fp6,(a0,d3.w) ;write conjugated complex mirror result
;----- imaginary part ------
;ximag*cos (see above) is in fp4
fmul.x fp7,fp1 ;xreal(k+n2) * sin
fsub.x fp1,fp4 ;timag = ximag*cos - xreal*sin
fmove.s (a1,d3.w),fp0 ;XImag(K)
fsub.x fp4,fp0 ;minus TImag
fmove.s fp0,(a1,d7.w) ;XImag(K+N2)=XImag(k)-TImag
fadd.x fp4,fp0 ;-> =XImag(k)
fadd.x fp4,fp0 ;XImag(k)+TImag
fmove.s fp0,(a1,d3.w) ;write conjugated complex mirror result
addq.w #4,d3 ;k=k+1
addq.w #4,d7 ;k+N2=k+N2+1
dbf d1,FFT_Innerloop
move.w a2,d1
add.w d1,d3 ;k=k+N2
cmp.w d5,d3 ;N*4-1 ?
blt.w FFT_Innerloop2
;------------------ one complete iteration done, next one -----------------------------------
moveq #0,d3 ;K=0
lsr.w #1,d1 ;N2=N2/2
dbf d2,FFT_Outerloop ;NU1=NU1-1 -> to NU1 = 0 proceeded, at <0 done
;---------------------------- reorder the results (Butterfly) and ----------------------
;--- get the coefficients for the N*2 point FFT back from the coeffs of N point FFT ----
movem.l (sp),d0/a0/a1/a3 ;restore Data area for writing Output
move.l a0,a2
move.l a1,a4
lea Realtab,a0
lea Imgtab,a1
moveq #0,d1
bset d0,d1
move.w d1,d2 ;N
subq.w #1,d1 ;N-1
moveq #1,d0
fmove.s #0.5,fp6
fmove.s (a0),fp2 ;Xreal(0)
fmove.s (a1),fp4 ;Ximag(0)
fadd.x fp4,fp2 ;Xreal + Ximag
fmove.s fp2,(a2)+ ;Gleichanteil
clr.l (a4)+ ;Phasendrehung für Gleichspannung ist immer 0 -> Ximag = 0
FFT_reorg
move.w (a5,d0.w*2),d4 ;i=IBR(k)
move.w (a5,d1.w*2),d5 ;i=IBR(k)
fmove.s (a0,d4.w*4),fp2 ;Xreal(n)
fmove.s (a0,d5.w*4),fp3 ;Xreal(N-n)
fmove.s (a1,d4.w*4),fp4 ;Ximag(n)
fmove.s (a1,d5.w*4),fp5 ;Ximag(N-n)
fmul.x fp6,fp2 ;divide all values by 2
fmul.x fp6,fp3
fmul.x fp6,fp4
fmul.x fp6,fp5
fadd.x fp3,fp2 ;R(n)+R(N-n)
fmove.x fp2,fp0
fsub.x fp3,fp2
fsub.x fp3,fp2 ;R(n)-R(N-n)
fmove.x fp2,fp3 ;R(n)-R(N-n)
fmul.s (a3,d0.w*8),fp2 ;sin ¶n/N * ( R(n)-R(N-n) )
fmul.s 4(a3,d0.w*8),fp3 ;cos ¶n/N * ( R(n)-R(N-n) )
fsub.x fp2,fp0
fsub.x fp1,fp1
fsub.x fp3,fp1 ;-[ cos ¶n/N * ( R(n)-R(N-n) ) ]
fsub.x fp5,fp4 ;I(n)-I(N-n)
fadd.x fp4,fp1 ;+I(n)-I(N-n)
fadd.x fp5,fp4
fadd.x fp5,fp4 ;I(n)+I(N-n)
fmove.x fp4,fp5
fmul.s (a3,d0.w*8),fp4 ;sin ¶n/N * ( I(n)+I(N-n) )
fmul.s 4(a3,d0.w*8),fp5 ;cos ¶n/N * ( I(n)+I(N-n) )
fsub.x fp4,fp1
fmove.s fp1,(a4)+ ;-[ sin ¶n/N * ( I(n)+I(N-n) ) ]
fadd.x fp5,fp0
fmove.s fp0,(a2)+ ;+[ cos ¶n/N * ( I(n)+I(N-n) ) ]
subq.w #1,d1
addq.w #1,d0
cmp.w d2,d0
blt.w FFT_reorg
fmove.s (a0),fp2 ;Xreal(0)
fmove.s (a1),fp4 ;Ximag(0)
movem.l (sp)+,d0/a0/a1/a3 ;restore Data area for writing Output
fsub.x fp4,fp2 ;Xreal - Ximag
fsub.s (a0),fp2
fmove.s fp2,(a2)+ ;Frequency N (Samplingrate / 2)
clr.l (a4)+ ;Phase for fmax = 0 or 180° -> Imag always 0
FFT_Fail
movem.l (sp)+,d0-d7/a0-a6
rts
;----------------------- Create the local Bitreverse table -----------------------------
;Input: d0: power (used by this FFT, input Power / 2)
; a5: WORD *BitreverseTab
;
MakeBitreversetabs:
movem.l d0-d7/a0-a6,-(sp)
moveq #0,d5
bset d0,d5
subq.w #1,d5 ;N-1
move d0,d6 ;Anzahl der Bits
moveq #0,d1
BRT_Innerloop1
moveq #0,d3
move d1,d2
move d6,d7
subq #1,d7
BRT_Innerloop2
roxr #1,d2
roxl #1,d3
dbf d7,BRT_Innerloop2
move d3,(a5)+
addq #1,d1
dbra d5,BRT_Innerloop1
movem.l (sp)+,d0-d7/a0-a6
rts
;----------------------- Create input data array for FFT -------------------------------
;Input: d0: power (used by this FFT, input Power / 2)
; a0: float *x_real
;
;used Globals:
; *Realtab
; *Imgtab
MakeIntab
movem.l d0-d7/a0-a6,-(sp)
lea Realtab,a2
lea Imgtab,a3
moveq #0,d1
bset d0,d1
subq.w #1,d1
IN_tab
move.l (a0)+,(a2)+ ;g(k)=x(2k) k=0,1,...,N*2-1
move.l (a0)+,(a3)+ ;h(k)=x(2k+1)
dbra d1,IN_tab
movem.l (sp)+,d0-d7/a0-a6
rts
section __MERGED,DATA
Last_Gamma dc.l 0 ;check for nesessity of creating the tables
section __MERGED,BSS ;rename to __MERGED for SAS
Realtab ds.l 1<<Gamma ;N values (float)
Imgtab ds.l 1<<Gamma ;N values (float)
BitreverseTab ds.w 1<<Gamma ;N values (WORD)
END